This analysis looks at the field data collected for the following 2017 MLRA project: - MLRA 111B - Glynwood B-slope Erosion; Northeastern IN
Spatially disaggregate the existing SSURGO polygons for Glynwood B-slope map units using ArcSIE, in order to separate different soil erosion phases. The current SSURGO maps join issues at the SSA boundaries, due to different erosion phases. This project is deemed relevant due to the current interest in Soil Health. Distinguishing the difference in erosion phases may have minimal impact on the majority of soil interpretations, but is believed to be significant in distinguishing crop yields.
library(aqp)
library(soilDB)
library(reshape2)
library(ggplot2)
library(gridExtra)
library(knitr)
library(cluster)
library(caret)
library(party)
library(vegan)
library(rgdal)
library(sp)
library(mapview)
# projectmapunit data from NASIS
project <- get_projectmapunit_from_NASIS()
project_nodups <- project[!duplicated(project$nationalmusym), c("nationalmusym", "muname")]
# MUPOLYGONs for the Project
gw_pol <- readOGR(dsn = paste0(ownCloud, "glynwood.shp"), layer = "glynwood")
# Soil Survey Areas
ssa <- readOGR(dsn = paste0(geodata, "soils/soilsa_a_nrcs.shp"), layer = "soilsa_a_nrcs")
# Series Extent of Glynwood from SoilWeb
gw_series <- seriesExtent("Glynwood")
# field Data
gw <- read.csv(paste0(ownCloud, "Pts_gnbero_27Jan17.csv"))
gw_sp <- gw
coordinates(gw_sp) <- ~ long + lat
proj4string(gw_sp) <- CRS("+init=epsg:4326")
gw_sp <- spTransform(gw_sp, CRS(proj4string(gw_pol)))
# spatial overlay field data with mupolygons and merge with nasis mapunits
vars <- c("AREASYMBOL", "nationalmu")
gw <- cbind(gw, over(gw_sp, gw_pol)[vars])
gw <- merge(gw, project_nodups, by.x = "nationalmu", by.y = "nationalmusym", all.x = TRUE, sort = FALSE)
# Extract erosion phases from NASIS and combine component and phase
ero_labels <- c("undisturbed", "slight", "moderate", "severe")
gw <- within(gw, {
EroClassFD = factor(EroClassFD, levels = 0:3, labels = ero_labels)
EroClassSIE = factor(EroClassSIE, levels = 0:3, labels = ero_labels)
EroClassFD2 = ifelse(EroClassFD == "severe", "severe", "slight")
EroClassNASIS = NA
EroClassNASIS[grepl("eroded", muname)] = "eroded"
EroClassNASIS[grepl("sev.|severely", muname)] = "sev.eroded"
EroClassNASIS[!grepl("eroded", muname)] = "non.eroded"
soilname2 = soilname
soilname2 = ifelse(soilname2 %in% c("Glynwood", "Morley", "Shinrock", "Rawson", "Mississinewa"), "Glynwood", soilname2)
soilname2 = ifelse(soilname2 %in% c("Blount", "Elliott"), "Blount", soilname2)
soilname2 = ifelse(soilname2 %in% c("Pewamo", "Pandora", "Mermill"), "Pewamo", soilname2)
soilname3 = paste0(soilname2, ifelse(soilname2 == "Glynwood", paste0("-", EroClassFD2), ""))
})
gw <- transform(gw,
rgb = munsell2rgb(mxhue, mxvalue, mxchroma, return_triplets = TRUE)
)
gw_sp <- gw
coordinates(gw_sp) <- ~ long + lat
proj4string(gw_sp) <- CRS("+init=epsg:4326")
gw_sp <- spTransform(gw_sp, CRS(proj4string(gw_pol)))
The geodata from the Glynwood points was extracted from several rasters at various resolutions. The data using to generate the ArcSIE model came from a DEM with a resolution of 15-feet. The other used came from the 10-meter USGS NED, which was primarily resampled from LiDAR.
# Extract data from rasters
library(raster)
# NW files
fd <- paste0(geodata, "project_data/11FIN/PointDataEval/")
dd <- c("slope10",
"procrv10",
"plncrv10",
"maxcrv10",
"mincrv10",
"relpos_r5",
"wetness_mp"
)
fp <- paste0(fd, "Mosaic_NW_pts/Derivatives/", dd, "/", "w001001.adf")
rs <- stack(fp)
names(rs) <- dd
proj4string(rs) <- CRS("+init=epsg:2965")
gd_nw <- extract(rs, gw_sp, df = TRUE, sp = TRUE)@data
gd_nw <- subset(gd_nw, !is.na(slope10))
# SE files
fp <- paste0(fd, "Mosaic_SE_pts/Derivatives/", dd, "/", "w001001.adf")
rs <- stack(fp)
names(rs) <- dd
proj4string(rs) <- CRS("+init=epsg:2965")
gd_se <- extract(rs, gw_sp, df = TRUE, sp = TRUE)@data
gd_se <- subset(gd_se, !is.na(slope10))
gd15ft <- rbind(gd_nw, gd_se)
rm(gd_nw, gd_se)
write.csv(gd15ft, file = paste0(ownCloud, "geodata_15ft_extract.csv"))
# Region 11 files
subset_rasters <- function(input, output) {
cat(paste0(input, "\n"))
gdal_translate(
src_dataset = input,
dst_dataset = output,
projwin = c(bb[1,1], bb[2,2], bb[1,2], bb[2,1]),
of = "GTiff",
a_nodata = -99999,
overwrite = TRUE,
verbose = TRUE
)
}
warp_rasters <- function(input, output){
cat(paste0(input,"\n"))
gdalwarp(
srcfile = input,
dstfile = output,
te = bb,
s_srs = CRSargs(CRS("+init=epsg:5070")),
t_srs = CRSargs(CRS("+init=epsg:5070")),
r = "bilinear",
tr = c(10, 10),
of = "GTiff",
overwrite = TRUE,
verbose = TRUE
)
}
fd <- paste0(geodata, "project_data/11FIN/sdat/")
dd2 <- c("ned10m_11FIN.sdat",
"ned10m_11FIN_aspect5.sdat",
"ned10m_11FIN_slope5.sdat",
"ned10m_11FIN_cupro5.sdat",
"ned10m_11FIN_cutan5.sdat",
"ned30m_11FIN_mvalleys.sdat",
"ned30m_11FIN_wetness.sdat",
"ned30m_11FIN_z2stream.sdat",
"nlcd30m_11FIN_lulc2011.sdat"
)
dd_names <- c("elev", "aspect5", "slope5", "kp5", "kt5", "mvalley", "wetness2", "z2streams", "lulc")
dd <- paste0(fd, dd2)
input <- dd
output <- paste0(geodata, "project_data/11FIN/glynwood/", gsub(".sdat", ".tif", dd2))
bb <- bbox(gw_sp <- spTransform(gw_sp, CRS("+init=epsg:5070")))
mapply(subset_rasters, input, output)
mapply(subset_rasters, input = paste0(geodata, "project_data/11FIN/nlcd30m_11FIN_lulc2011.tif"), output = paste0(geodata, "project_data/11FIN/glynwood/nlcd30m_11FIN_lulc2011.tif"))
mapply(warp_rasters, input = output, output = gsub(".tif", "2.tif", gsub("30m", "10m", output)))
dd <- output
rs10m <- stack(dd[grepl("10m", dd)])
names(rs10m) <- dd_names[1:5]
rs30m <- stack(dd[grepl("30m", dd)])
names(rs30m) <- dd_names[6:9]
rs10m <- stack(gsub(".tif", "2.tif", gsub("30m", "10m", output)))
names(rs10m) <- dd_names
gw <- as.data.frame(extract(rs10m, gw_sp, df = TRUE, sp = TRUE))
gd30m <- as.data.frame(extract(rs30m, gw_sp, df = TRUE))
gw <- cbind(gd10m, gd30m[, -1])
rm(gd10m, gd30m)
# Save data
save(gw, gw_sp, gw_pol, gw_series, ssa, ero_labels, file = paste0(ownCloud, "Pts_gnbero_27Jan17_geodata.RData"))
# Load cached dataset
load(paste0(ownCloud, "Pts_gnbero_27Jan17_geodata.RData"))
ssa <- subset(ssa, areasymbol %in% unique(gw$AREASYMBOL))
ssa <- spTransform(ssa, CRS("+init=epsg:4326"))
# Create interactive map
mapView(gw_series) + ssa + gw_sp
vals2 <- c("EroClassFD", "EroClassNASIS", "nationalmu", "AREASYMBOL")
gw_sub <- gw[vals2]
# Frequency of field observation vs map unit
# Duplicate the data for each REASYMBOL and relabel MLRA
gw_sub2 <- by(gw_sub, gw_sub$nationalmu, function(x) {
x[vals2][1, ]
x[, "AREASYMBOL"] <- "MLRA"
return(x)
})
gw_sub2 <- do.call("rbind", gw_sub2)
gw_sub <- rbind(gw_sub, gw_sub2)
gw_sub$natmuSsaEro <- with(gw_sub,
paste0(nationalmu, "-", AREASYMBOL, "-", EroClassNASIS)
)
test <- xtabs(~ natmuSsaEro + EroClassFD, data = gw_sub)
kable(test, caption = "Frequence by MUSYM-SSA-EROSION")
| undisturbed | slight | moderate | severe | |
|---|---|---|---|---|
| 2t6ll-IN009-sev.eroded | 6 | 19 | 18 | 3 |
| 2t6ll-IN053-sev.eroded | 3 | 13 | 20 | 11 |
| 2t6ll-IN075-sev.eroded | 6 | 8 | 4 | 5 |
| 2t6ll-MLRA-sev.eroded | 15 | 40 | 42 | 19 |
| 2t6lm-IN009-sev.eroded | 2 | 10 | 10 | 0 |
| 2t6lm-IN053-sev.eroded | 1 | 6 | 8 | 12 |
| 2t6lm-IN075-sev.eroded | 3 | 1 | 11 | 9 |
| 2t6lm-IN179-sev.eroded | 5 | 12 | 0 | 13 |
| 2t6lm-MLRA-sev.eroded | 11 | 29 | 29 | 34 |
| 2v4bn-IN069-eroded | 4 | 3 | 6 | 4 |
| 2v4bn-MLRA-eroded | 4 | 3 | 6 | 4 |
| 2v4bp-IN179-eroded | 0 | 3 | 0 | 2 |
| 2v4bp-MLRA-eroded | 0 | 3 | 0 | 2 |
| 5jjt-IN035-sev.eroded | 1 | 1 | 2 | 9 |
| 5jjt-MLRA-sev.eroded | 1 | 1 | 2 | 9 |
| NA-NA-non.eroded | 6 | 9 | 11 | 22 |
kable(round(prop.table(test, 1) * 100), caption = "Percent by MUSYM-SSA-EROSION")
| undisturbed | slight | moderate | severe | |
|---|---|---|---|---|
| 2t6ll-IN009-sev.eroded | 13 | 41 | 39 | 7 |
| 2t6ll-IN053-sev.eroded | 6 | 28 | 43 | 23 |
| 2t6ll-IN075-sev.eroded | 26 | 35 | 17 | 22 |
| 2t6ll-MLRA-sev.eroded | 13 | 34 | 36 | 16 |
| 2t6lm-IN009-sev.eroded | 9 | 45 | 45 | 0 |
| 2t6lm-IN053-sev.eroded | 4 | 22 | 30 | 44 |
| 2t6lm-IN075-sev.eroded | 12 | 4 | 46 | 38 |
| 2t6lm-IN179-sev.eroded | 17 | 40 | 0 | 43 |
| 2t6lm-MLRA-sev.eroded | 11 | 28 | 28 | 33 |
| 2v4bn-IN069-eroded | 24 | 18 | 35 | 24 |
| 2v4bn-MLRA-eroded | 24 | 18 | 35 | 24 |
| 2v4bp-IN179-eroded | 0 | 60 | 0 | 40 |
| 2v4bp-MLRA-eroded | 0 | 60 | 0 | 40 |
| 5jjt-IN035-sev.eroded | 8 | 8 | 15 | 69 |
| 5jjt-MLRA-sev.eroded | 8 | 8 | 15 | 69 |
| NA-NA-non.eroded | 12 | 19 | 23 | 46 |
Several of counties phased severely eroded, are not dominanted by field observations classified as severely eroded.
cm <- confusionMatrix(data = gw$EroClassSIE, reference = gw$EroClassFD)
print(cm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 0 0 0 0
## slight 6 8 11 9
## moderate 24 39 53 37
## severe 7 37 25 44
##
## Overall Statistics
##
## Accuracy : 0.35
## 95% CI : (0.2961, 0.4069)
## No Information Rate : 0.3
## P-Value [Acc > NIR] : 0.03521
##
## Kappa : 0.0767
## Mcnemar's Test P-Value : 1.555e-13
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.0000 0.09524 0.5955
## Specificity 1.0000 0.87963 0.5261
## Pos Pred Value NaN 0.23529 0.3464
## Neg Pred Value 0.8767 0.71429 0.7551
## Prevalence 0.1233 0.28000 0.2967
## Detection Rate 0.0000 0.02667 0.1767
## Detection Prevalence 0.0000 0.11333 0.5100
## Balanced Accuracy 0.5000 0.48743 0.5608
## Class: severe
## Sensitivity 0.4889
## Specificity 0.6714
## Pos Pred Value 0.3894
## Neg Pred Value 0.7540
## Prevalence 0.3000
## Detection Rate 0.1467
## Detection Prevalence 0.3767
## Balanced Accuracy 0.5802
test <- as.data.frame(cm$table)
ggplot(test, aes(x = Reference, y = Freq, fill = Prediction)) +
geom_bar(stat = "identity") +
coord_flip()
The accuracy of the current ArcSIE model appears to be low, according to several metrics. None of the classes achieve a positive predictive value of > 50% for the reference data.
soil_vals <- c("hzthk", "SolumDp", "CaCO3Dp", "claytotest", "firstbtclay", "mxvalue", "mxchroma")
geo_vals1 <- c("SlopeSIE", "ProfCrv", "PlanCrv", "relpos", "wetness")
geo_vals2 <- c("slope5", "kt5", "kp5", "z2streams", "wetness2", "mvalley")
vals <- c(soil_vals, geo_vals1, geo_vals2)
gw_lo1 <- melt(gw, id.vars = "EroClassFD", measure.vars = vals)
gw_lo2 <- melt(gw, id.vars = "EroClassSIE", measure.vars = vals)
names(gw_lo1)[1] <- "EroClass"
gw_lo1$method <- "FD"
names(gw_lo2)[1] <- "EroClass"
gw_lo2$method <- "SIE"
gw_lo <- rbind(gw_lo1, gw_lo2)
ggplot(gw_lo, aes(x = EroClass, y = value)) +
geom_boxplot() +
facet_wrap(~ paste(variable, method), scales="free", ncol = 4) +
coord_flip()
An exploratory analysis shows a considerable amount of overlap exists between the field determined (FD) erosion classes and measurable soil properties. In comparison the FD and SIE (Soil Inference Engine) erosion classes show different patterns within the boxplots, further suggesting that the SIE classes aren’t capturing the field observations accurately.
soil_vals2 <- c("hzthk", "SolumDp", "CaCO3Dp", "claytotest", "firstbtclay") # excluded color, only observed a narrow range thus small differences swamp everthing else
vals <- c(soil_vals2, geo_vals2)
test <- gw[, vals]
test_d <- daisy(scale(test), metric = "gower")
test_mds <- metaMDS(test_d, distance = "gower", autotransform = FALSE, trace = FALSE)
test_pts <- cbind(as.data.frame(test_mds$points), EroClassFD = gw$EroClassFD)
g1 <- ggplot(gw, aes(x = hzthk, y = SolumDp, color = EroClassFD)) +
geom_point(cex = 2, alpha = 0.75) +
theme(aspect.ratio = 1)
g2 <- ggplot(test_pts, aes(x = MDS1, y = MDS2, color = EroClassFD)) +
geom_point(cex = 2, alpha = 0.75) +
theme(aspect.ratio = 1)
grid.arrange(g1, g2, ncol = 2)
According to the scatterplot above it appears that only the severe and slight classes are separatable. The moderate erosion class seems to overlap the most with slight. Both the 15-feet and 10-meter DEM derivatives were evaluated, but the results are similar. The overlap in the FD classes is likely due to bias amongst the soil scientists who collected the data.
test <- subset(gw, !is.na(EroClassFD))
test_ct <- ctree(EroClassFD ~ ., data = test[, c("EroClassFD", soil_vals)])
plot(test_ct)
confusionMatrix(data = predict(test_ct, type = "response"), reference = test$EroClassFD)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 30 20 11 2
## slight 3 48 5 1
## moderate 3 17 67 27
## severe 1 0 7 60
##
## Overall Statistics
##
## Accuracy : 0.6788
## 95% CI : (0.6229, 0.7311)
## No Information Rate : 0.298
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.567
## Mcnemar's Test P-Value : 1.943e-06
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.81081 0.5647 0.7444
## Specificity 0.87547 0.9585 0.7783
## Pos Pred Value 0.47619 0.8421 0.5877
## Neg Pred Value 0.97071 0.8490 0.8777
## Prevalence 0.12252 0.2815 0.2980
## Detection Rate 0.09934 0.1589 0.2219
## Detection Prevalence 0.20861 0.1887 0.3775
## Balanced Accuracy 0.84314 0.7616 0.7614
## Class: severe
## Sensitivity 0.6667
## Specificity 0.9623
## Pos Pred Value 0.8824
## Neg Pred Value 0.8718
## Prevalence 0.2980
## Detection Rate 0.1987
## Detection Prevalence 0.2252
## Balanced Accuracy 0.8145
In order to see if more separation can be achieved amongst the erosion classes a hierachical classifition was peformed. Four unsupervised classes were chosen and manually matched to the FD classes.
test_c <- hclust(test_d, method = "ward")
plot(test_c, labels = gw$upedonid)
rect.hclust(test_c, k = 4)
clusters <- cbind(gw,
test_pts[, 1:2],
clusters = factor(cutree(test_c, k = 4),
levels = c(2, 3, 1, 4),
labels = ero_labels
)
)
xtabs(~ EroClassFD + clusters, data = clusters)
## clusters
## EroClassFD undisturbed slight moderate severe
## undisturbed 14 14 6 3
## slight 12 48 22 3
## moderate 7 24 25 34
## severe 4 0 17 69
g1 <- ggplot(clusters, aes(x = MDS1, y = MDS2, col = EroClassFD)) +
geom_point(cex = 2, alpha = 0.75) +
theme(aspect.ratio = 1)
g2 <- ggplot(clusters, aes(x = MDS1, y = MDS2, col = clusters), main = "test") +
geom_point(cex = 2, alpha = 0.75) +
theme(aspect.ratio = 1)
grid.arrange(g1, g2, ncol = 2)
In comparison the hierarchical clusters have less overlap when viewed along the multidimensional scaled axes, but still does not seem to separate the moderate class.
gw_lo3 <- melt(clusters, id.vars = "clusters", measure.vars = c(soil_vals, geo_vals2))
names(gw_lo3)[1] <- "EroClass"
gw_lo3$method <- "clusters"
gw_lo1 <- subset(gw_lo1, ! variable %in% c("relpos", "wetness", "SlopeSIE"))
gw_lo <- rbind(gw_lo1, gw_lo3)
ggplot(gw_lo, aes(x = EroClass, y = value)) +
geom_boxplot() +
facet_wrap(~ paste(variable, method), scales="free", ncol = 4) +
coord_flip()
test2 <- ctree(clusters ~ ., data = clusters[, c("clusters", soil_vals)])
plot(test2)
confusionMatrix(data = predict(test2, type = "response"), reference = clusters$clusters)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 36 14 1 0
## slight 1 52 10 2
## moderate 0 19 48 23
## severe 0 1 11 84
##
## Overall Statistics
##
## Accuracy : 0.7285
## 95% CI : (0.6746, 0.7778)
## No Information Rate : 0.3609
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6302
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.9730 0.6047 0.6857
## Specificity 0.9434 0.9398 0.8190
## Pos Pred Value 0.7059 0.8000 0.5333
## Neg Pred Value 0.9960 0.8565 0.8962
## Prevalence 0.1225 0.2848 0.2318
## Detection Rate 0.1192 0.1722 0.1589
## Detection Prevalence 0.1689 0.2152 0.2980
## Balanced Accuracy 0.9582 0.7722 0.7523
## Class: severe
## Sensitivity 0.7706
## Specificity 0.9378
## Pos Pred Value 0.8750
## Neg Pred Value 0.8786
## Prevalence 0.3609
## Detection Rate 0.2781
## Detection Prevalence 0.3179
## Balanced Accuracy 0.8542
Below several statistical models are evaluated to see if a more accurate model can be developed.
test3 <- ctree(EroClassFD ~ ., data = clusters[, c("EroClassFD", geo_vals2)])
plot(test3)
confusionMatrix(data = predict(test3, type = "response"), reference = clusters$EroClassFD)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 0 0 0 0
## slight 0 0 0 0
## moderate 18 12 21 13
## severe 19 73 69 77
##
## Overall Statistics
##
## Accuracy : 0.3245
## 95% CI : (0.272, 0.3805)
## No Information Rate : 0.298
## P-Value [Acc > NIR] : 0.1724
##
## Kappa : 0.0377
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.0000 0.0000 0.23333
## Specificity 1.0000 1.0000 0.79717
## Pos Pred Value NaN NaN 0.32812
## Neg Pred Value 0.8775 0.7185 0.71008
## Prevalence 0.1225 0.2815 0.29801
## Detection Rate 0.0000 0.0000 0.06954
## Detection Prevalence 0.0000 0.0000 0.21192
## Balanced Accuracy 0.5000 0.5000 0.51525
## Class: severe
## Sensitivity 0.8556
## Specificity 0.2406
## Pos Pred Value 0.3235
## Neg Pred Value 0.7969
## Prevalence 0.2980
## Detection Rate 0.2550
## Detection Prevalence 0.7881
## Balanced Accuracy 0.5481
test3 <- cforest(as.factor(EroClassFD) ~ ., data = gw[, c("EroClassFD", geo_vals2)])
varimp(test3)
## slope5 kt5 kp5 z2streams wetness2
## 0.032558559 0.014810811 0.006414414 0.004432432 0.016270270
## mvalley
## -0.001873874
confusionMatrix(data = predict(test3, type = "response", OOB = TRUE), reference = gw$EroClassFD)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 3 8 4 4
## slight 8 27 24 17
## moderate 15 25 34 15
## severe 11 25 28 54
##
## Overall Statistics
##
## Accuracy : 0.3907
## 95% CI : (0.3354, 0.4483)
## No Information Rate : 0.298
## P-Value [Acc > NIR] : 0.0003594
##
## Kappa : 0.1505
## Mcnemar's Test P-Value : 0.0194217
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.081081 0.3176 0.3778
## Specificity 0.939623 0.7742 0.7406
## Pos Pred Value 0.157895 0.3553 0.3820
## Neg Pred Value 0.879859 0.7434 0.7371
## Prevalence 0.122517 0.2815 0.2980
## Detection Rate 0.009934 0.0894 0.1126
## Detection Prevalence 0.062914 0.2517 0.2947
## Balanced Accuracy 0.510352 0.5459 0.5592
## Class: severe
## Sensitivity 0.6000
## Specificity 0.6981
## Pos Pred Value 0.4576
## Neg Pred Value 0.8043
## Prevalence 0.2980
## Detection Rate 0.1788
## Detection Prevalence 0.3907
## Balanced Accuracy 0.6491
test4 <- ctree(clusters ~ ., data = clusters[, c("clusters", geo_vals2)])
plot(test4)
confusionMatrix(data = predict(test4, type = "response"), reference = clusters$clusters)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 12 3 1 2
## slight 0 31 1 14
## moderate 13 5 63 12
## severe 12 47 5 81
##
## Overall Statistics
##
## Accuracy : 0.6192
## 95% CI : (0.5618, 0.6742)
## No Information Rate : 0.3609
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4596
## Mcnemar's Test P-Value : 7.988e-08
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.32432 0.3605 0.9000
## Specificity 0.97736 0.9306 0.8707
## Pos Pred Value 0.66667 0.6739 0.6774
## Neg Pred Value 0.91197 0.7852 0.9665
## Prevalence 0.12252 0.2848 0.2318
## Detection Rate 0.03974 0.1026 0.2086
## Detection Prevalence 0.05960 0.1523 0.3079
## Balanced Accuracy 0.65084 0.6455 0.8853
## Class: severe
## Sensitivity 0.7431
## Specificity 0.6684
## Pos Pred Value 0.5586
## Neg Pred Value 0.8217
## Prevalence 0.3609
## Detection Rate 0.2682
## Detection Prevalence 0.4801
## Balanced Accuracy 0.7058
test4 <- cforest(clusters ~ ., data = clusters[, c("clusters", geo_vals2)])
varimp(test4)
## slope5 kt5 kp5 z2streams wetness2 mvalley
## 0.016828829 0.022774775 0.001027027 0.001135135 0.009315315 0.196288288
confusionMatrix(data = predict(test4, type = "response", OOB=TRUE), reference = clusters$clusters)
## Confusion Matrix and Statistics
##
## Reference
## Prediction undisturbed slight moderate severe
## undisturbed 9 1 3 4
## slight 4 40 1 25
## moderate 13 4 61 11
## severe 11 41 5 69
##
## Overall Statistics
##
## Accuracy : 0.5927
## 95% CI : (0.535, 0.6486)
## No Information Rate : 0.3609
## P-Value [Acc > NIR] : 2.413e-16
##
## Kappa : 0.4249
## Mcnemar's Test P-Value : 0.003769
##
## Statistics by Class:
##
## Class: undisturbed Class: slight Class: moderate
## Sensitivity 0.24324 0.4651 0.8714
## Specificity 0.96981 0.8611 0.8793
## Pos Pred Value 0.52941 0.5714 0.6854
## Neg Pred Value 0.90175 0.8017 0.9577
## Prevalence 0.12252 0.2848 0.2318
## Detection Rate 0.02980 0.1325 0.2020
## Detection Prevalence 0.05629 0.2318 0.2947
## Balanced Accuracy 0.60653 0.6631 0.8754
## Class: severe
## Sensitivity 0.6330
## Specificity 0.7047
## Pos Pred Value 0.5476
## Neg Pred Value 0.7727
## Prevalence 0.3609
## Detection Rate 0.2285
## Detection Prevalence 0.4172
## Balanced Accuracy 0.6688
Try and model the soil components and phases separately. The data is highly unbalanced. Create separate logistic regression models for similar minor components.
#gw$weights <- ifelse(gw$soilname == "Glynwood", 0.1, 1)
gw$gw_severe <- ifelse(gw$soilname3 == "Glynwood-severe", TRUE, FALSE)
test4 <- cforest(as.factor(gw_severe) ~ elev + slope5 + kt5 + kp5 + wetness2 + mvalley + z2streams, data = gw)
sort(varimp(test4), decreasing = TRUE)
## slope5 elev wetness2 kt5 z2streams
## 0.0274774775 0.0251171171 0.0157297297 0.0148468468 0.0022162162
## kp5 mvalley
## 0.0009009009 -0.0003063063
confusionMatrix(data = predict(test4, type = "response", OOB = TRUE), reference = gw$gw_severe, positive = "TRUE")
## Confusion Matrix and Statistics
##
## Reference
## Prediction FALSE TRUE
## FALSE 197 65
## TRUE 15 25
##
## Accuracy : 0.7351
## 95% CI : (0.6815, 0.784)
## No Information Rate : 0.702
## P-Value [Acc > NIR] : 0.1152
##
## Kappa : 0.2464
## Mcnemar's Test P-Value : 4.293e-08
##
## Sensitivity : 0.27778
## Specificity : 0.92925
## Pos Pred Value : 0.62500
## Neg Pred Value : 0.75191
## Prevalence : 0.29801
## Detection Rate : 0.08278
## Detection Prevalence : 0.13245
## Balanced Accuracy : 0.60351
##
## 'Positive' Class : TRUE
##
test3 <- glm(as.factor(gw_severe) ~ elev + slope5 + kt5, data = gw, family = "binomial", na.action = na.exclude)
confusionMatrix(data = predict(test3, type = "response") > 0.4, reference = gw$gw_severe, positive = "TRUE")
## Confusion Matrix and Statistics
##
## Reference
## Prediction FALSE TRUE
## FALSE 174 49
## TRUE 35 40
##
## Accuracy : 0.7181
## 95% CI : (0.6634, 0.7685)
## No Information Rate : 0.7013
## P-Value [Acc > NIR] : 0.2865
##
## Kappa : 0.2953
## Mcnemar's Test P-Value : 0.1561
##
## Sensitivity : 0.4494
## Specificity : 0.8325
## Pos Pred Value : 0.5333
## Neg Pred Value : 0.7803
## Prevalence : 0.2987
## Detection Rate : 0.1342
## Detection Prevalence : 0.2517
## Balanced Accuracy : 0.6410
##
## 'Positive' Class : TRUE
##
summary(test3)
##
## Call:
## glm(formula = as.factor(gw_severe) ~ elev + slope5 + kt5, family = "binomial",
## data = gw, na.action = na.exclude)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5512 -0.8366 -0.6028 1.0314 2.4255
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.953655 2.044889 -4.379 1.19e-05 ***
## elev 0.024425 0.007071 3.455 0.000551 ***
## slope5 0.388811 0.109621 3.547 0.000390 ***
## kt5 0.089968 0.027948 3.219 0.001286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 363.39 on 297 degrees of freedom
## Residual deviance: 327.73 on 294 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 335.73
##
## Number of Fisher Scoring iterations: 4
gw$predicted <- predict(test3, type = "response") > 0.4
gw_lo1 <- melt(gw, id.vars = "gw_severe", measure.vars = vals)
gw_lo2 <- melt(gw, id.vars = "predicted", measure.vars = vals)
gw_lo2 <- na.exclude(gw_lo2)
names(gw_lo1)[1] <- "EroClass"
gw_lo1$method <- "FD"
names(gw_lo2)[1] <- "EroClass"
gw_lo2$method <- "GLM"
gw_lo <- rbind(gw_lo1, gw_lo2)
ggplot(gw_lo, aes(x = EroClass, y = value)) +
geom_boxplot() +
facet_wrap(~ paste(variable, method), scales="free", ncol = 4) +
coord_flip()
predfun <- function(model, data) {
v <- predict(model, data, type = "response")
cbind(
p = as.vector(v$fit)
)
}
r <- predict(rs10m, test3, fun = predfun, index = 1:2, progress = "text")
writeRaster(r[[1]], "C:/workspace/severe_erosion.tif", overwrite = TRUE, progress = "text")
r <-predict(rs10m, test4, type='response', progress='text')
writeRaster(r[[1]], "C:/workspace/severe_erosion_cf.tif", overwrite = TRUE, progress = "text")